
global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/7_count_patents_by_industry.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {



* v24 6_count_patents

/* This do-file generates the patent counts by using SIC4 industry via the mapping done in step 3

* Inputs:
- US_docdbc_cipc.dta
- ipc4_sic4_ind_using_concordance.dta

Output:
- patents_sic_ind_flow
*/

* ----------------------------------------
* A. Count patents by C/IPC or IPC4 - year
* -----------------------------------------

foreach cvers in ipc4 cipc6 { 
	* Import granted USPTO patents to c/ipc6 codes with classific. applied
	use  ${alm_data_proc}/US_docdb_cipc.dta, clear

	if "`cvers'" == "ipc4" { 
		* Downsize the IPC6 to 4 digits
		gen ipc4=substr(cipc6, 1, 4)
		* Drop the IPC4 codes that do not occure in the concordance table of the IPC4 and 
		* SIC industry of using (assume that those patents get assigned by the Co-IPCs and 
		* thus those patents should not be included in calculating the shares)
		drop if inlist(ipc4,"A99Z","B68C","B68F","B82T","B99Z","C12J","C13D","C13F","C13G")
		drop if inlist(ipc4,"E02C","E99Z","F21H","F99Z","G10B","G10F","G21J","G99Z","H04T")
		drop if inlist(ipc4,"H99Z")
	}

	* Assign patents fractionally, corrected for duplicates
	duplicates tag docdb_family_id, generate(dupl_docdb)
	gen fraction_docdb = 1/(dupl_docdb + 1)
	foreach tech in auto90 auto95 in_relevant_field pauto90 pauto95 pauto90_rm6 {
		replace `tech' = fraction_docdb * `tech'
		gen `tech'_b = `tech' * bia
	}
	keep if inrange(appln_year, 1970, 2004)
	collapse (sum) auto90 auto90_b auto95 auto95_b pauto90 pauto90_b pauto95 pauto95_b pauto90_rm6 pauto90_rm6_b in_relevant_field in_relevant_field_b, by (`cvers' appln_year)
	save ${alm_data_proc}/`cvers'_patents.dta, replace
}

* TODO: count and aggregate the main regression patents!


* ---------------------------------------------------------------
* C. IPC4 DHOZ "OLD" Mapping
* ---------------------------------------------------------------

* for old mapping, use direct cw's

* Apply sic4 - ind6090 cw to go to ind6090 level

foreach indvers in use mk {

	use ${alm_data_proc}/cw_ipc4_sic4_`indvers'.dta, clear
	ren sic4_`indvers' sic4
	mmerge sic4 using ${alm_data_proc}/cw_sic4_ind6090.dta, unmatched(master)
	ren ind6090 ind6090_`indvers'
	assert _m == 3	
	drop _m
	gen w_ipc4_ind6090_`indvers' = w_ipc4_sic4_`indvers' * weight
	keep ipc4 ind6090_`indvers' w_ipc4_ind6090_`indvers'

	* make sure the weights sum up to one 
	bys ipc4: egen w_total = total(w_ipc4_ind6090_`indvers')
	replace w_ipc4_ind6090_`indvers' =  w_ipc4_ind6090_`indvers' / w_total
	duplicates drop
	sort ipc4 ind6090_`indvers'
	save ${alm_data_proc}/cw_ipc4_ind6090_`indvers'.dta, replace
	}


*count technologies by ind6090
foreach indvers in sic4_use sic4_mk ind6090_use ind6090_mk {

	use ${alm_data_proc}/ipc4_patents.dta, clear
	mmerge ipc4 using ${alm_data_proc}/cw_ipc4_`indvers'.dta, unmatched(both)
	assert _merge == 3
	drop _m
	foreach tech in auto90 auto90_b auto95 auto95_b pauto90 pauto90_b pauto95 pauto95_b pauto90_rm6 pauto90_rm6_b in_relevant_field in_relevant_field_b { 
		replace `tech' = w_ipc4_`indvers' * `tech'
	}
	collapse (sum) auto90 auto90_b auto95 auto95_b pauto90 pauto90_b pauto95 pauto95_b pauto90_rm6 pauto90_rm6_b in_relevant_field in_relevant_field_b, by (`indvers' appln_year)
	sort `indvers' appln_year
	save ${alm_data_proc}/patents_`indvers'_ipc4.dta, replace
}



* ---------------------------------------------------------------
* D. Collapse sic-level to beaind
* ---------------------------------------------------------------


local cvers ipc4
foreach indvers in use mk {
	use ${alm_data_proc}/patents_sic4_`indvers'_`cvers'.dta, clear
	ren sic4_`indvers' sic4
	mmerge sic4 using ${alm_data_proc}/cw_sic4_beaind.dta, unmatched(both)
	tab _m
	keep if _m == 3
	drop _m
	collapse (sum) auto90 auto90_b auto95 auto95_b pauto90 pauto90_b pauto95 pauto95_b in_relevant_field in_relevant_field_b, by (beaind appln_year)
	sort beaind appln_year
	ren beaind beaind_`indvers'
	save ${alm_data_proc}/patents_beaind_`indvers'_ipc4.dta, replace
}


}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat